VERSION 1.0 CLASS
BEGIN
  MultiUse = -1  'True
  Persistable = 0  'NotPersistable
  DataBindingBehavior = 0  'vbNone
  DataSourceBehavior  = 0  'vbNone
  MTSTransactionMode  = 0  'NotAnMTSObject
END
Attribute VB_Name = "CTextExtraction"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = True
Attribute VB_PredeclaredId = False
Attribute VB_Exposed = False
Option Explicit

Private Const MAX_LINE_ERROR As Double = 4#  ' This must be the square of the allowed error (2 * 2 in this case).

Private Enum TTextDir
   tfLeftToRight = 0
   tfRightToLeft = 1
   tfTopToBottom = 2
   tfBottomToTop = 4
   tfNotInitialized = -1
End Enum
Private m_File As Long
Private m_HavePos As Boolean
Private m_LastTextDir As TTextDir
Private m_LastTextEndX As Double
Private m_LastTextEndY As Double
Private m_LastTextInfX As Double
Private m_LastTextInfY As Double
Private m_NewLine As String
Private m_PDF As CPDF
Private m_Space As String
Private m_Stack As TPDFStack

Private m_Templates() As Long
Private m_Templates_Capacity As Long
Private m_Templates_Count As Long

Private Sub Class_Initialize()
   m_LastTextDir = tfNotInitialized
   m_NewLine = StrConv(Chr(13) + Chr(10), vbUnicode)
   m_Space = StrConv(" ", vbUnicode)
End Sub

Sub Class_Terminate()
   Erase m_Templates
End Sub

Private Function CalcDistance(ByVal x1 As Double, ByVal y1 As Double, ByVal x2 As Double, ByVal y2 As Double) As Double
   Dim dx As Double, dy As Double
   dx = x2 - x1
   dy = y2 - y1
   CalcDistance = Sqr(dx * dx + dy * dy)
End Function

Private Function AddText() As Long
   On Error GoTo err

   Dim i As Long
   Dim textDir As TTextDir
   Dim x As Double
   Dim x1 As Double, y1 As Double, x2 As Double, y2 As Double
   x1 = 0#
   y1 = 0#
   x2 = 0#
   y2 = m_Stack.FontSize
   ' Transform the text matrix to user space
   Dim m As TCTM
   m = MulMatrix(m_Stack.ctm, m_Stack.tm)
   Call Transform(m, x1, y1) ' Start point of the text record
   'The second point to determine the text direction can also be used to
   'calculate the visible font size measured in user space:
   '  Dim realFontSize as Double = CalcDistance(x1, y1, x2, y2)
   Call Transform(m, x2, y2) ' Second point to determine the text direction
   ' Determine the text direction
   If y1 = y2 Then
      If x1 > x2 Then
         textDir = tfBottomToTop
      Else
         textDir = tfTopToBottom
      End If
   Else
      If y1 > y2 Then
         textDir = tfRightToLeft
      Else
         textDir = tfLeftToRight
      End If
   End If
   If textDir <> m_LastTextDir Then
      ' Extend the x-coordinate to an infinite point
      m_LastTextInfX = 1000000#
      m_LastTextInfY = 0#
      Call Transform(m, m_LastTextInfX, m_LastTextInfY)
      If m_LastTextDir <> TTextDir.tfNotInitialized Then
         ' Add a new line to the output file
         Put m_File, , m_NewLine
      End If
   ElseIf Not IsPointOnLine(x1, y1, m_LastTextEndX, m_LastTextEndY, m_LastTextInfX, m_LastTextInfY) Then
      ' Extend the x-coordinate to an infinite point
      m_LastTextInfX = 1000000#
      m_LastTextInfY = 0#
      Call Transform(m, m_LastTextInfX, m_LastTextInfY)
      If m_LastTextDir <> TTextDir.tfNotInitialized Then
         ' Add a new line to the output file
         Put m_File, , m_NewLine
      End If
   Else
      'The space width is measured in text space but the distance between two text
      'records is measured in user space! We must transform the space width to user
      'space before we can compare the values.
      Dim distance As Double, SpaceWidth As Double
      ' Note that we use the full space width here because the end position of the last record
      ' was set to the record width minus the half space width.
      Dim x3 As Double, y3 As Double
      x3 = m_Stack.SpaceWidth
      y3 = 0#
      Call Transform(m, x3, y3)
      SpaceWidth = CalcDistance(x1, y1, x3, y3)
      distance = CalcDistance(m_LastTextEndX, m_LastTextEndY, x1, y1)
      If distance > SpaceWidth Then
         'Add a space to the output file
         Put m_File, , m_Space
      End If
   End If
   ' We use the half space width to determine whether a space must be inserted at
   ' a specific position. This produces better results in most cases.
   Dim s As String
   Dim spw As Single
   spw = -m_Stack.SpaceWidth * 0.5
   If m_Stack.FontSize < 0.0 Then spw = -spw
   For i = 0 To UBound(m_Stack.Kerning)
      If m_Stack.Kerning(i).Advance < spw Then
         ' Add a space to the output file
         Put m_File, , m_Space
      End If
      s = StrConv(m_Stack.Kerning(i).Text, vbUnicode)
      Put m_File, , s
   Next i
   ' We don't set the cursor to the real end of the string because applications like MS Word
   ' add often a space to the end of a text record and this space can slightly overlap the next
   ' record. IsPointOnLine() would return false in this case.
   m_LastTextEndX = m_Stack.TextWidth + spw ' spw is a negative value!
   m_LastTextEndY = 0#
   m_LastTextDir = textDir
   ' Calculate the end coordinate of the text record
   Call Transform(m, m_LastTextEndX, m_LastTextEndY)
   AddText = 0
   Exit Function
err:
   Resume Next
   AddText = -1
End Function

Public Sub CloseFile()
   If m_File <> 0 Then
      Close m_File
      m_File = 0
   End If
End Sub

 Private Function IsPointOnLine(ByVal x As Double, ByVal y As Double, ByVal x0 As Double, ByVal y0 As Double, ByVal x1 As Double, ByVal y1 As Double) As Boolean
    Dim dx As Double, dy As Double, di As Double
    x = x - x0
    y = y - y0
    dx = x1 - x0
    dy = y1 - y0
    di = (x * dx + y * dy) / (dx * dx + dy * dy)
    If di < 0# Then
       di = 0#
    ElseIf di > 1# Then
       di = 1#
    End If
    dx = x - di * dx
    dy = y - di * dy
    di = dx * dx + dy * dy
    IsPointOnLine = (di < MAX_LINE_ERROR)
 End Function

 Private Function MulMatrix(ByRef M1 As TCTM, ByRef M2 As TCTM) As TCTM
    Dim retval As TCTM
    retval.a = M2.a * M1.a + M2.b * M1.c
    retval.b = M2.a * M1.b + M2.b * M1.d
    retval.c = M2.c * M1.a + M2.d * M1.c
    retval.d = M2.c * M1.b + M2.d * M1.d
    retval.x = M2.x * M1.a + M2.y * M1.c + M1.x
    retval.y = M2.x * M1.b + M2.y * M1.d + M1.y
    MulMatrix = retval
 End Function

Public Sub OpenFile(ByVal FileName As String)
   Dim s As String
   m_File = FreeFile
   If Dir(FileName) <> "" Then Kill FileName
   Open FileName For Binary Access Write As m_File
   s = StrConv(ChrW$(&HFEFF), vbUnicode)
   Put m_File, , s
End Sub

Public Sub ParsePage()
   ClearTemplates
   If Not m_PDF.InitStack(m_Stack) Then Return
   m_LastTextEndX = 0#
   m_LastTextEndY = 0#
   m_LastTextDir = tfNotInitialized
   m_LastTextInfX = 0#
   m_LastTextInfY = 0#

   ParseText
   ParseTemplates
End Sub

' Templates are parsed recursively.
Private Sub ParseTemplates()
   Dim i As Long, j As Long, tmpl As Long
   For i = 0 To m_PDF.GetTemplCount() - 1
      If Not m_PDF.EditTemplate(i) Then Return
      tmpl = m_PDF.GetTemplHandle()
      If FindTemplate(tmpl) < 0 Then
         AddTemplate (tmpl)

         If Not m_PDF.InitStack(m_Stack) Then Return

         ParseText

         For j = 0 To m_PDF.GetTemplCount() - 1
            ParseTemplates
         Next j
         m_PDF.EndTemplate
      Else
         m_PDF.EndTemplate
      End If
   Next i
End Sub

Private Sub ParseText()
   Dim haveMore As Boolean
   ' Get the first text record if any
   haveMore = m_PDF.GetPageText(m_Stack)
   ' No text found?
   If Not haveMore And (Len(m_Stack.Text) = 0) Then Exit Sub
   AddText
   If haveMore Then
      Do While m_PDF.GetPageText(m_Stack)
         AddText
      Loop
   End If
End Sub

Public Sub SetPDFInst(ByVal PDFInst As CPDF)
   Set m_PDF = PDFInst
End Sub

Private Sub Transform(ByRef m As TCTM, ByRef x As Double, ByRef y As Double)
   Dim tx As Double
   tx = x
   x = tx * m.a + y * m.c + m.x
   y = tx * m.b + y * m.d + m.y
End Sub

Public Sub WritePageIdentifier(ByVal PageNum As Long)
   Dim s As String
   If PageNum > 1 Then
      Put m_File, , m_NewLine
   End If
   s = StrConv("%----------------------- Page " + Str(PageNum) + " -----------------------------" + Chr(13) + Chr(10), vbUnicode)
   Put m_File, , s
End Sub

Private Sub AddTemplate(ByVal Templ As Long)
   If m_Templates_Count = m_Templates_Capacity Then
      ReDim m_Templates(m_Templates_Capacity + 63)
      m_Templates_Capacity = m_Templates_Capacity + 64
   End If
   m_Templates(m_Templates_Count) = Templ
   m_Templates_Count = m_Templates_Count + 1
End Sub

Private Sub ClearTemplates()
   m_Templates_Count = 0
End Sub

Private Function FindTemplate(ByVal tmpl As Long) As Long
   Dim i As Long
   Dim e As Long
   i = 0
   e = m_Templates_Count - 1
   While i <= e
      If m_Templates(i) = tmpl Then
         FindTemplate = i
         Exit Function
      End If
      If m_Templates(e) = tmpl Then
         FindTemplate = e
         Exit Function
      End If
      i = i + 1
      e = e - 1
   Wend
   FindTemplate = -1
End Function
